//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying female variable
local female "female_genderize"

// gender of author is known
local known_gender "female_genderize!=."




//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
generate article_author_id = _n
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020  // PNAS started comments in 2008
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
generate previous_pubs_bin = previous_pubs
replace previous_pubs_bin = 5 if previous_pubs>=5
replace previous_pubs_bin = . if previous_pubs==.

// include only years with comments, after counting number of publications in years before
drop if year<2008  

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by!=.

// merge with editor info
merge 1:1 article_author_id using "${data}/output/pnas_editors_match_to_authors.dta"
replace is_editor=0 if is_editor==.

// has been an editor before writing the paper
generate is_editor_before_paper = is_editor
replace is_editor_before_paper = 0 if is_editor_before_paper==1 & volume<earliest_volume_as_editor

// analysis
local i=1
local coefficients`i' "2 3"
regress commented i.is_editor_before_paper previous_pubs_bin i.year if comment==0 & `known_gender' & last_author==1, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins 
matrix a`i' = r(b)



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
generate article_author_id = _n
drop if year==2020
keep if comment | research_article

// generate cumulative number of publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
generate previous_pubs_bin = previous_pubs
replace previous_pubs_bin = 5 if previous_pubs>=5
replace previous_pubs_bin = . if previous_pubs==.

// generate indicator of last author in article
egen last_author_id = max(author_id), by(article_id)
generate last_author = author_id==last_author_id

// identify papers that receive comments
generate commented = call_by_1!=.

// merge with editor info
merge 1:1 article_author_id using "${data}/output/science_editors_match_to_authors.dta"
replace is_editor=0 if is_editor==.

// has been an editor before writing the paper
generate is_editor_before_paper = is_editor
replace is_editor_before_paper = 0 if is_editor_before_paper==1 & volume<earliest_volume_as_editor

// analysis
local i=2
local coefficients`i' "2 3"
regress commented i.is_editor_before_paper previous_pubs_bin i.year if comment==0 & `known_gender' & last_author==1 & volume>=324, vce(robust)
matrix m`i' = r(table)
scalar n`i' = e(N)
scalar rsq`i' = e(r2)
* p-vales
foreach k of numlist `coefficients`i'' {
	local p`i'_`k' = m`i'[4,`k']
}
margins 
matrix a`i' = r(b)




//
// significance stars for regressions above
forvalues j=1/2 {
	foreach k of numlist `coefficients`j'' {
		if `p`j'_`k''<0.1 & `p`j'_`k''>=0.05 {
			scalar st`j'_`k'="*"
		}
		else if `p`j'_`k''<0.05 & `p`j'_`k''>=0.01 {
			scalar st`j'_`k'="**"
		}
		else if `p`j'_`k''<0.01 & `p`j'_`k''>=0.001 {
			scalar st`j'_`k'="***"
		}
		else if `p`j'_`k''<0.001 {
			scalar st`j'_`k'="****"
		}
		else {
			scalar st`j'_`k'=""
		}
	}
}
	
